{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AGE</th>\n",
       "      <th>GENDER</th>\n",
       "      <th>MARRIAGE</th>\n",
       "      <th>EDU_EXPERIENCE</th>\n",
       "      <th>WORK_SIZE</th>\n",
       "      <th>WORK_POWER</th>\n",
       "      <th>IS_ILLEGAL_HIS</th>\n",
       "      <th>CURR_FREEZE_VALUE</th>\n",
       "      <th>GRADUATE_YEAR</th>\n",
       "      <th>OCCUPATION</th>\n",
       "      <th>OCCUPATION_TYPE</th>\n",
       "      <th>VIP_FLAG</th>\n",
       "      <th>GRAY_FLAG</th>\n",
       "      <th>FIVE_CLASS_TYPE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15735</th>\n",
       "      <td>51</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>99</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15741</th>\n",
       "      <td>56</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>60</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15753</th>\n",
       "      <td>45</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>70</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>9</td>\n",
       "      <td>z</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15788</th>\n",
       "      <td>41</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>70</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9</td>\n",
       "      <td>z</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15797</th>\n",
       "      <td>42</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>70</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       AGE  GENDER  MARRIAGE  EDU_EXPERIENCE  WORK_SIZE  WORK_POWER  \\\n",
       "15735   51       1         2              99          2           1   \n",
       "15741   56       1         2              60          2           1   \n",
       "15753   45       1         2              70          2           1   \n",
       "15788   41       1         2              70          2           1   \n",
       "15797   42       1         3              70          3           1   \n",
       "\n",
       "       IS_ILLEGAL_HIS  CURR_FREEZE_VALUE  GRADUATE_YEAR  OCCUPATION  \\\n",
       "15735             2.0                0.0            4.0           9   \n",
       "15741             2.0                0.0            4.0           9   \n",
       "15753             2.0                0.0            3.0           9   \n",
       "15788             2.0                0.0            4.0           9   \n",
       "15797             2.0                0.0            4.0           9   \n",
       "\n",
       "      OCCUPATION_TYPE  VIP_FLAG  GRAY_FLAG  FIVE_CLASS_TYPE  \n",
       "15735               5         0          0                0  \n",
       "15741               5         0          0                0  \n",
       "15753               z         0          0                0  \n",
       "15788               z         0          0                0  \n",
       "15797               5         0          0                1  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_path = './test2.csv'\n",
    "data = pd.read_csv(file_path,index_col=0)\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((504, 10), (504,))"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "numerical = ['AGE', 'WORK_SIZE', 'CURR_FREEZE_VALUE', 'GRADUATE_YEAR']\n",
    "\n",
    "categorical = ['EDU_EXPERIENCE', 'MARRIAGE', 'OCCUPATION', 'OCCUPATION_TYPE']\n",
    "\n",
    "binary = ['GENDER', 'WORK_POWER']\n",
    "\n",
    "train_X = data[numerical + categorical + binary]\n",
    "train_Y = data['FIVE_CLASS_TYPE']\n",
    "\n",
    "train_X.shape,train_Y.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "字段|中文|类型\n",
    "--|--|--\n",
    "AGE|年龄|数值\n",
    "WORK_SIZE|劳动人口数|数值\n",
    "CURR_FREEZE_VALUE|账户冻结金额|数值\n",
    "GRADUATE_YEAR|工作年限|数值\n",
    "EDU_EXPERIENCE|最高学历|类别\n",
    "MARRIAGE|结婚|类别\n",
    "OCCUPATION|职务|类别\n",
    "OCCUPATION_TYPE|职业类型|类别\n",
    "GENDER|性别|二值\n",
    "WORK_POWER|劳动能力|二值\n",
    "IS_ILLEGAL_HIS|是否非法|删除\n",
    "VIP_FLAG|白名单客户|删除\n",
    "GRAY_FLAG|灰名单客户|删除\n",
    "FIVE_CLASS_TYPE|五级分类|目标值\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 类别型变量进行One-hot编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>EDU_EXPERIENCE_10</th>\n",
       "      <th>EDU_EXPERIENCE_20</th>\n",
       "      <th>EDU_EXPERIENCE_30</th>\n",
       "      <th>EDU_EXPERIENCE_40</th>\n",
       "      <th>EDU_EXPERIENCE_50</th>\n",
       "      <th>EDU_EXPERIENCE_60</th>\n",
       "      <th>EDU_EXPERIENCE_70</th>\n",
       "      <th>EDU_EXPERIENCE_80</th>\n",
       "      <th>EDU_EXPERIENCE_90</th>\n",
       "      <th>EDU_EXPERIENCE_99</th>\n",
       "      <th>...</th>\n",
       "      <th>OCCUPATION_4</th>\n",
       "      <th>OCCUPATION_9</th>\n",
       "      <th>OCCUPATION_TYPE_0</th>\n",
       "      <th>OCCUPATION_TYPE_1</th>\n",
       "      <th>OCCUPATION_TYPE_3</th>\n",
       "      <th>OCCUPATION_TYPE_4</th>\n",
       "      <th>OCCUPATION_TYPE_5</th>\n",
       "      <th>OCCUPATION_TYPE_6</th>\n",
       "      <th>OCCUPATION_TYPE_y</th>\n",
       "      <th>OCCUPATION_TYPE_z</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15735</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15741</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15753</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15788</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15797</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       EDU_EXPERIENCE_10  EDU_EXPERIENCE_20  EDU_EXPERIENCE_30  \\\n",
       "15735                  0                  0                  0   \n",
       "15741                  0                  0                  0   \n",
       "15753                  0                  0                  0   \n",
       "15788                  0                  0                  0   \n",
       "15797                  0                  0                  0   \n",
       "\n",
       "       EDU_EXPERIENCE_40  EDU_EXPERIENCE_50  EDU_EXPERIENCE_60  \\\n",
       "15735                  0                  0                  0   \n",
       "15741                  0                  0                  1   \n",
       "15753                  0                  0                  0   \n",
       "15788                  0                  0                  0   \n",
       "15797                  0                  0                  0   \n",
       "\n",
       "       EDU_EXPERIENCE_70  EDU_EXPERIENCE_80  EDU_EXPERIENCE_90  \\\n",
       "15735                  0                  0                  0   \n",
       "15741                  0                  0                  0   \n",
       "15753                  1                  0                  0   \n",
       "15788                  1                  0                  0   \n",
       "15797                  1                  0                  0   \n",
       "\n",
       "       EDU_EXPERIENCE_99        ...          OCCUPATION_4  OCCUPATION_9  \\\n",
       "15735                  1        ...                     0             1   \n",
       "15741                  0        ...                     0             1   \n",
       "15753                  0        ...                     0             1   \n",
       "15788                  0        ...                     0             1   \n",
       "15797                  0        ...                     0             1   \n",
       "\n",
       "       OCCUPATION_TYPE_0  OCCUPATION_TYPE_1  OCCUPATION_TYPE_3  \\\n",
       "15735                  0                  0                  0   \n",
       "15741                  0                  0                  0   \n",
       "15753                  0                  0                  0   \n",
       "15788                  0                  0                  0   \n",
       "15797                  0                  0                  0   \n",
       "\n",
       "       OCCUPATION_TYPE_4  OCCUPATION_TYPE_5  OCCUPATION_TYPE_6  \\\n",
       "15735                  0                  1                  0   \n",
       "15741                  0                  1                  0   \n",
       "15753                  0                  0                  0   \n",
       "15788                  0                  0                  0   \n",
       "15797                  0                  1                  0   \n",
       "\n",
       "       OCCUPATION_TYPE_y  OCCUPATION_TYPE_z  \n",
       "15735                  0                  0  \n",
       "15741                  0                  0  \n",
       "15753                  0                  1  \n",
       "15788                  0                  1  \n",
       "15797                  0                  0  \n",
       "\n",
       "[5 rows x 27 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_dummies = pd.get_dummies(data[categorical],columns=categorical)\n",
    "data_dummies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(504, 34)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_X = pd.concat([data[numerical+binary],data_dummies],axis=1)\n",
    "train = pd.concat([train_X,train_Y],axis=1)\n",
    "train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AGE</th>\n",
       "      <td>504.0</td>\n",
       "      <td>44.952381</td>\n",
       "      <td>9.088132</td>\n",
       "      <td>21.0</td>\n",
       "      <td>38.0</td>\n",
       "      <td>46.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>75.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WORK_SIZE</th>\n",
       "      <td>504.0</td>\n",
       "      <td>2.041667</td>\n",
       "      <td>0.997138</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CURR_FREEZE_VALUE</th>\n",
       "      <td>504.0</td>\n",
       "      <td>17843.253968</td>\n",
       "      <td>48585.654664</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>500000.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GRADUATE_YEAR</th>\n",
       "      <td>504.0</td>\n",
       "      <td>3.412698</td>\n",
       "      <td>0.985137</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GENDER</th>\n",
       "      <td>504.0</td>\n",
       "      <td>1.160714</td>\n",
       "      <td>0.367632</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WORK_POWER</th>\n",
       "      <td>504.0</td>\n",
       "      <td>1.059524</td>\n",
       "      <td>0.236838</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EDU_EXPERIENCE_10</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.003968</td>\n",
       "      <td>0.062931</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EDU_EXPERIENCE_20</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.055556</td>\n",
       "      <td>0.229289</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EDU_EXPERIENCE_30</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.119048</td>\n",
       "      <td>0.324166</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EDU_EXPERIENCE_40</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.031746</td>\n",
       "      <td>0.175497</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EDU_EXPERIENCE_50</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.013889</td>\n",
       "      <td>0.117146</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EDU_EXPERIENCE_60</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.224206</td>\n",
       "      <td>0.417473</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EDU_EXPERIENCE_70</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.311508</td>\n",
       "      <td>0.463570</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EDU_EXPERIENCE_80</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.047619</td>\n",
       "      <td>0.213170</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EDU_EXPERIENCE_90</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.003968</td>\n",
       "      <td>0.062931</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EDU_EXPERIENCE_99</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.188492</td>\n",
       "      <td>0.391493</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MARRIAGE_1</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.085317</td>\n",
       "      <td>0.279631</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MARRIAGE_2</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.789683</td>\n",
       "      <td>0.407939</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MARRIAGE_3</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.039683</td>\n",
       "      <td>0.195406</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MARRIAGE_4</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.085317</td>\n",
       "      <td>0.279631</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OCCUPATION_1</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.013889</td>\n",
       "      <td>0.117146</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OCCUPATION_2</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.051587</td>\n",
       "      <td>0.221412</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OCCUPATION_3</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.178571</td>\n",
       "      <td>0.383374</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OCCUPATION_4</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.150794</td>\n",
       "      <td>0.358203</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OCCUPATION_9</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.605159</td>\n",
       "      <td>0.489302</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OCCUPATION_TYPE_0</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.049603</td>\n",
       "      <td>0.217339</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OCCUPATION_TYPE_1</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.031746</td>\n",
       "      <td>0.175497</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OCCUPATION_TYPE_3</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.101190</td>\n",
       "      <td>0.301880</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OCCUPATION_TYPE_4</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.103175</td>\n",
       "      <td>0.304489</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OCCUPATION_TYPE_5</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.454365</td>\n",
       "      <td>0.498408</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OCCUPATION_TYPE_6</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.031746</td>\n",
       "      <td>0.175497</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OCCUPATION_TYPE_y</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.073413</td>\n",
       "      <td>0.261072</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OCCUPATION_TYPE_z</th>\n",
       "      <td>504.0</td>\n",
       "      <td>0.154762</td>\n",
       "      <td>0.362037</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   count          mean           std   min   25%   50%   75%  \\\n",
       "AGE                504.0     44.952381      9.088132  21.0  38.0  46.0  52.0   \n",
       "WORK_SIZE          504.0      2.041667      0.997138   0.0   2.0   2.0   2.0   \n",
       "CURR_FREEZE_VALUE  504.0  17843.253968  48585.654664   0.0   0.0   0.0   0.0   \n",
       "GRADUATE_YEAR      504.0      3.412698      0.985137   0.0   3.0   4.0   4.0   \n",
       "GENDER             504.0      1.160714      0.367632   1.0   1.0   1.0   1.0   \n",
       "WORK_POWER         504.0      1.059524      0.236838   1.0   1.0   1.0   1.0   \n",
       "EDU_EXPERIENCE_10  504.0      0.003968      0.062931   0.0   0.0   0.0   0.0   \n",
       "EDU_EXPERIENCE_20  504.0      0.055556      0.229289   0.0   0.0   0.0   0.0   \n",
       "EDU_EXPERIENCE_30  504.0      0.119048      0.324166   0.0   0.0   0.0   0.0   \n",
       "EDU_EXPERIENCE_40  504.0      0.031746      0.175497   0.0   0.0   0.0   0.0   \n",
       "EDU_EXPERIENCE_50  504.0      0.013889      0.117146   0.0   0.0   0.0   0.0   \n",
       "EDU_EXPERIENCE_60  504.0      0.224206      0.417473   0.0   0.0   0.0   0.0   \n",
       "EDU_EXPERIENCE_70  504.0      0.311508      0.463570   0.0   0.0   0.0   1.0   \n",
       "EDU_EXPERIENCE_80  504.0      0.047619      0.213170   0.0   0.0   0.0   0.0   \n",
       "EDU_EXPERIENCE_90  504.0      0.003968      0.062931   0.0   0.0   0.0   0.0   \n",
       "EDU_EXPERIENCE_99  504.0      0.188492      0.391493   0.0   0.0   0.0   0.0   \n",
       "MARRIAGE_1         504.0      0.085317      0.279631   0.0   0.0   0.0   0.0   \n",
       "MARRIAGE_2         504.0      0.789683      0.407939   0.0   1.0   1.0   1.0   \n",
       "MARRIAGE_3         504.0      0.039683      0.195406   0.0   0.0   0.0   0.0   \n",
       "MARRIAGE_4         504.0      0.085317      0.279631   0.0   0.0   0.0   0.0   \n",
       "OCCUPATION_1       504.0      0.013889      0.117146   0.0   0.0   0.0   0.0   \n",
       "OCCUPATION_2       504.0      0.051587      0.221412   0.0   0.0   0.0   0.0   \n",
       "OCCUPATION_3       504.0      0.178571      0.383374   0.0   0.0   0.0   0.0   \n",
       "OCCUPATION_4       504.0      0.150794      0.358203   0.0   0.0   0.0   0.0   \n",
       "OCCUPATION_9       504.0      0.605159      0.489302   0.0   0.0   1.0   1.0   \n",
       "OCCUPATION_TYPE_0  504.0      0.049603      0.217339   0.0   0.0   0.0   0.0   \n",
       "OCCUPATION_TYPE_1  504.0      0.031746      0.175497   0.0   0.0   0.0   0.0   \n",
       "OCCUPATION_TYPE_3  504.0      0.101190      0.301880   0.0   0.0   0.0   0.0   \n",
       "OCCUPATION_TYPE_4  504.0      0.103175      0.304489   0.0   0.0   0.0   0.0   \n",
       "OCCUPATION_TYPE_5  504.0      0.454365      0.498408   0.0   0.0   0.0   1.0   \n",
       "OCCUPATION_TYPE_6  504.0      0.031746      0.175497   0.0   0.0   0.0   0.0   \n",
       "OCCUPATION_TYPE_y  504.0      0.073413      0.261072   0.0   0.0   0.0   0.0   \n",
       "OCCUPATION_TYPE_z  504.0      0.154762      0.362037   0.0   0.0   0.0   0.0   \n",
       "\n",
       "                        max  \n",
       "AGE                    75.0  \n",
       "WORK_SIZE               6.0  \n",
       "CURR_FREEZE_VALUE  500000.0  \n",
       "GRADUATE_YEAR           4.0  \n",
       "GENDER                  2.0  \n",
       "WORK_POWER              2.0  \n",
       "EDU_EXPERIENCE_10       1.0  \n",
       "EDU_EXPERIENCE_20       1.0  \n",
       "EDU_EXPERIENCE_30       1.0  \n",
       "EDU_EXPERIENCE_40       1.0  \n",
       "EDU_EXPERIENCE_50       1.0  \n",
       "EDU_EXPERIENCE_60       1.0  \n",
       "EDU_EXPERIENCE_70       1.0  \n",
       "EDU_EXPERIENCE_80       1.0  \n",
       "EDU_EXPERIENCE_90       1.0  \n",
       "EDU_EXPERIENCE_99       1.0  \n",
       "MARRIAGE_1              1.0  \n",
       "MARRIAGE_2              1.0  \n",
       "MARRIAGE_3              1.0  \n",
       "MARRIAGE_4              1.0  \n",
       "OCCUPATION_1            1.0  \n",
       "OCCUPATION_2            1.0  \n",
       "OCCUPATION_3            1.0  \n",
       "OCCUPATION_4            1.0  \n",
       "OCCUPATION_9            1.0  \n",
       "OCCUPATION_TYPE_0       1.0  \n",
       "OCCUPATION_TYPE_1       1.0  \n",
       "OCCUPATION_TYPE_3       1.0  \n",
       "OCCUPATION_TYPE_4       1.0  \n",
       "OCCUPATION_TYPE_5       1.0  \n",
       "OCCUPATION_TYPE_6       1.0  \n",
       "OCCUPATION_TYPE_y       1.0  \n",
       "OCCUPATION_TYPE_z       1.0  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_X.describe().T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
